Set the working dir
setwd("/mnt/picea/projects/spruce/htuominen/27_SpruceTimeSeries")
Libs
suppressPackageStartupMessages(library(parallel))
suppressPackageStartupMessages(library(DESeq2))
suppressPackageStartupMessages(library(RColorBrewer))
suppressPackageStartupMessages(library(scatterplot3d))
suppressPackageStartupMessages(library(pander))
suppressPackageStartupMessages(library(gplots))
suppressPackageStartupMessages(library(venneuler))
suppressPackageStartupMessages(library(VennDiagram))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(lubridate))
Load helper files
source("~/Git/UPSCb/src/R/densityPlot.R")
source("~/Git/UPSCb/src/R/plot.multidensity.R")
source("~/Git/UPSCb/src/R/plotMA.R")
suppressPackageStartupMessages(source("~/Git/UPSCb/src/R/volcanoPlot.R"))
Clean the search path
detach("package:limma")
Palettes
pal <- brewer.pal(3,"Dark2")
cols <- brewer.pal(8,"Dark2")
my_palette <- colorRampPalette(c("blue", "yellow", "red"))(n = 99)
Some defaults
alpha <- 0.01
Read the sample information
samples <- read.csv("~/Git/UPSCb/projects/spruce-wood-time-series/doc/samplesmod.csv")
Set the result directory
outdir <- file.path("analysis/HTSeq/","20161216")
Read the raw data matrix
count.table <- read.csv(file.path(outdir,"raw-unormalised-data.csv"),row.names = 1)
normalised data
load(file="analysis/vst_blind-and-aware_results.rda")
Extract some gene subsets
png("phloem_xylem_Venn.png",width=800,height=800,pointsize = 34)
plot.new()
grid.draw(venn.diagram(list(
rownames(count.table)[rowSums(count.table[,match(samples$Filename[ samples$Tissue == "Phloem" ],colnames(count.table))] >10 ) >0],
rownames(count.table)[rowSums(count.table[,match(samples$Filename[ samples$Tissue == "Xylem" ],colnames(count.table))] >10 ) >0]
),
filename=NULL,
col=pal[1:2],
category.names=c("Phloem","Xylem")))
dev.off()
## png
## 2
Extract the varied set of genes from the Venn Diagram
Phloem.genes <- setdiff(
rownames(count.table)[rowSums(count.table[,match(samples$Filename[ samples$Tissue == "Phloem" ],colnames(count.table))] >10 ) >0],
rownames(count.table)[rowSums(count.table[,match(samples$Filename[ samples$Tissue == "Xylem" ],colnames(count.table))] >10 ) >0])
write.csv2(Phloem.genes,"Phloem.genes")
Phloem.genesb <- setdiff(
rownames(count.table)[rowSums(count.table[,match(samples$Filename[ samples$Tissue == "Phloem" ],colnames(count.table))] >0 ) >0],
rownames(count.table)[rowSums(count.table[,match(samples$Filename[ samples$Tissue == "Xylem" ],colnames(count.table))] >0 ) >0])
write.csv2(Phloem.genesb,"Phloem.genesb")
Xylem.genes <- setdiff(
rownames(count.table)[rowSums(count.table[,match(samples$Filename[ samples$Tissue == "Xylem" ],colnames(count.table))] >10 ) >0],
rownames(count.table)[rowSums(count.table[,match(samples$Filename[ samples$Tissue == "Phloem" ],colnames(count.table))] >10 ) >0])
write.csv2(Xylem.genes,"Xylem.genes")
Xylem.genesb <- setdiff(
rownames(count.table)[rowSums(count.table[,match(samples$Filename[ samples$Tissue == "Xylem" ],colnames(count.table))] >0 ) >0],
rownames(count.table)[rowSums(count.table[,match(samples$Filename[ samples$Tissue == "Phloem" ],colnames(count.table))] >0 ) >0])
write.csv2(Xylem.genesb,"Xylem.genesb")
Common.genes <- intersect(
rownames(count.table)[rowSums(count.table[,match(samples$Filename[ samples$Tissue == "Phloem" ],colnames(count.table))] >10 ) >0],
rownames(count.table)[rowSums(count.table[,match(samples$Filename[ samples$Tissue == "Xylem" ],colnames(count.table))] >10 ) >0])
write.csv2(Common.genes,"Common.genes")
Common.genesb <- intersect(
rownames(count.table)[rowSums(count.table[,match(samples$Filename[ samples$Tissue == "Phloem" ],colnames(count.table))] >0 ) >0],
rownames(count.table)[rowSums(count.table[,match(samples$Filename[ samples$Tissue == "Xylem" ],colnames(count.table))] >0 ) >0])
write.csv2(Common.genesb,"Common.genesb")
mysamplessec <-c("P825_210","P825_211","P825_212","P825_213","P825_214","P825_215")
pc <- prcomp(t(bst[,mysamplessec]))
percent <- round(summary(pc)$importance[2,]*100)
Plot the PCA 3 first dimensions
colours <- ifelse(
samples$Date[match(mysamplessec,samples$Filename)]=="July-4-2011",
cols[1],
cols[3])
mar=c(5.1,4.1,4.1,2.1)
scatterplot3d(pc$x[,1],
pc$x[,2],
pc$x[,3],
xlab=paste("Comp. 1 (",percent[1],"%)",sep=""),
ylab=paste("Comp. 2 (",percent[2],"%)",sep=""),
zlab=paste("Comp. 3 (",percent[3],"%)",sep=""),
color=colours,
pch=19)
par(mar=mar)
The first two dims
plot(pc$x[,1],
pc$x[,2],
xlab=paste("Comp. 1 (",percent[1],"%)",sep=""),
ylab=paste("Comp. 2 (",percent[2],"%)",sep=""),
col=colours,
pch=19,
main="Principal Component Analysis",sub="regularized log transformed counts")
The 2nd and 3rd dims
plot(pc$x[,2],
pc$x[,3],
xlab=paste("Comp. 2 (",percent[2],"%)",sep=""),
ylab=paste("Comp. 3 (",percent[3],"%)",sep=""),
col=colours,
pch=19,
main="Principal Component Analysis",sub="regularized log transformed counts")
mysamplesfour <-c("P825_213","P825_214","P825_215","P825_216","P825_217","P825_218","P825_219","P825_220","P825_221")
pc <- prcomp(t(bst[,mysamplesfour]))
percent <- round(summary(pc)$importance[2,]*100)
colours <- ifelse(
samples$Dev.keyword[match(mysamplesfour,samples$Filename)]=="ALW",
cols[3],
cols[1])
Plot the PCA 3 first dimensions
mar=c(5.1,4.1,4.1,2.1)
scatterplot3d(pc$x[,1],
pc$x[,2],
pc$x[,3],
xlab=paste("Comp. 1 (",percent[1],"%)",sep=""),
ylab=paste("Comp. 2 (",percent[2],"%)",sep=""),
zlab=paste("Comp. 3 (",percent[3],"%)",sep=""),
color=colours,
pch=19)
par(mar=mar)
The first two dims
plot(pc$x[,1],
pc$x[,2],
xlab=paste("Comp. 1 (",percent[1],"%)",sep=""),
ylab=paste("Comp. 2 (",percent[2],"%)",sep=""),
col=colours,
pch=19,
main="Principal Component Analysis",sub="regularized log transformed counts")
The 2nd and 3rd dims
plot(pc$x[,2],
pc$x[,3],
xlab=paste("Comp. 2 (",percent[2],"%)",sep=""),
ylab=paste("Comp. 3 (",percent[3],"%)",sep=""),
col=colours,
pch=19,
main="Principal Component Analysis",sub="regularized log transformed counts")
The data
mysamples <-c("P1789_101","P1789_102","P1789_103","P825_201","P825_202","P825_203")
conditions <- factor(samples$Sampling[match(mysamples,samples$Filename)])
dds <- DESeqDataSetFromMatrix(
countData = count.table[,mysamples],
colData = data.frame(condition=conditions),
design = ~ condition)
DE
dds <- suppressMessages(DESeq(dds))
plotDispEsts(dds)
res <- results(dds)
Subset to what is expressed
res <- res[!is.na(res$padj),]
Summary plots and stats
DESeq2::plotMA(res,alpha=alpha)
## Loading required package: LSD
volcanoPlot(res,alpha=alpha)
hist(res$padj,breaks=seq(0,1,.01))
sum(res$padj<alpha,na.rm=TRUE)
## [1] 3920
Select significant genes
signf <- res[res$padj<alpha,]
signf[order(abs(signf$log2FoldChange),decreasing=TRUE),]
## log2 fold change (MLE): condition 2 vs 1
## Wald test p-value: condition 2 vs 1
## DataFrame with 3920 rows and 6 columns
## baseMean log2FoldChange lfcSE
## <numeric> <numeric> <numeric>
## MA_10426936g0010 210.737693226111 11.8190732411238 1.20185300934184
## MA_10434197g0020 108.814189905748 10.8666288398743 1.27509819362707
## MA_10305558g0010 24.2009155239864 8.69757541707638 2.46179215450166
## MA_10427363g0010 49.8597981185811 -8.4614838304044 1.30372030389021
## MA_10427976g0010 39.65158822378 8.43979794769816 1.34542174130604
## ... ... ... ...
## MA_3965g0010 1079.52295756837 0.398234819661555 0.117087461576487
## MA_93349g0010 2899.99347961133 0.396853104687475 0.109100444386052
## MA_34468g0010 1502.3418043754 0.386684551425778 0.117924263246282
## MA_230579g0020 4594.60522962362 -0.38021065809169 0.113919975592726
## MA_109428g0010 1958.92287881819 -0.367635578827592 0.110524790653625
## stat pvalue
## <numeric> <numeric>
## MA_10426936g0010 9.83404222417863 8.03282859540954e-23
## MA_10434197g0020 8.52218981580055 1.56564519470911e-17
## MA_10305558g0010 3.53302588976568 0.000410832237273278
## MA_10427363g0010 -6.49026006970662 8.56883270876647e-11
## MA_10427976g0010 6.27297574328284 3.54212107851745e-10
## ... ... ...
## MA_3965g0010 3.40117391136204 0.000670971265655539
## MA_93349g0010 3.63750218361357 0.000275294859138649
## MA_34468g0010 3.27909236641314 0.00104141543648597
## MA_230579g0020 -3.3375240480298 0.000845284067860572
## MA_109428g0010 -3.32627256431301 0.000880158154627298
## padj
## <numeric>
## MA_10426936g0010 7.43139630057375e-20
## MA_10434197g0020 7.63357819258172e-15
## MA_10305558g0010 0.00466126639019492
## MA_10427363g0010 9.75279129754872e-09
## MA_10427976g0010 3.36315075033973e-08
## ... ...
## MA_3965g0010 0.00684440013142546
## MA_93349g0010 0.00341915267391479
## MA_34468g0010 0.00967162649894822
## MA_230579g0020 0.00820938066444399
## MA_109428g0010 0.00846131553967853
signf[order(signf$padj),]
## log2 fold change (MLE): condition 2 vs 1
## Wald test p-value: condition 2 vs 1
## DataFrame with 3920 rows and 6 columns
## baseMean log2FoldChange lfcSE
## <numeric> <numeric> <numeric>
## MA_5724g0010 630.761815264725 -6.28553515305548 0.321079236787829
## MA_9035900g0010 2549.55689068474 -1.92163673134283 0.118049961202798
## MA_99200g0020 678.984077105495 -2.28683965459406 0.144111991834503
## MA_10430173g0010 302.093931938286 -4.95679959013011 0.317461019285263
## MA_10426614g0010 3001.42346463857 -1.62621476136164 0.112068685739876
## ... ... ... ...
## MA_577287g0010 214.915306722229 0.666238432759879 0.203827504344114
## MA_10377389g0010 27.9521843344055 -1.58011491195999 0.483429740851731
## MA_830263g0010 40.7335622014 -1.42229419998622 0.435211213968145
## MA_4334518g0010 170.368310404605 -0.718355032090491 0.219822225601262
## MA_10428032g0010 1511.90920722306 0.417578738800713 0.12781097370283
## stat pvalue
## <numeric> <numeric>
## MA_5724g0010 -19.5762741183074 2.46397637390161e-85
## MA_9035900g0010 -16.2781648698863 1.41035269051356e-59
## MA_99200g0020 -15.8684896758644 1.04730857198299e-56
## MA_10430173g0010 -15.6138841905376 5.85606499242756e-55
## MA_10426614g0010 -14.5108756351106 1.03392242447161e-47
## ... ... ...
## MA_577287g0010 3.2686385230675 0.00108066264797963
## MA_10377389g0010 -3.26855130835778 0.00108099576058064
## MA_830263g0010 -3.26805503704306 0.00108289305468411
## MA_4334518g0010 -3.26789081552438 0.00108352156764756
## MA_10428032g0010 3.26715873217282 0.00108632752326719
## padj
## <numeric>
## MA_5724g0010 8.89002675703699e-81
## MA_9035900g0010 2.54427625368646e-55
## MA_99200g0020 1.2595631092382e-52
## MA_10430173g0010 5.28217062316966e-51
## MA_10426614g0010 7.46078421498715e-44
## ... ...
## MA_577287g0010 0.00995666709374492
## MA_10377389g0010 0.00995719352610407
## MA_830263g0010 0.00997212389305841
## MA_4334518g0010 0.00997536569551516
## MA_10428032g0010 0.00999864720394907
write.csv2(signf,"signf.csv")
conditions <- samples$Dev.keyword[match(mysamplesfour,samples$Filename)]
dds <- suppressMessages(DESeqDataSetFromMatrix(
countData = count.table[,mysamplesfour],
colData = data.frame(condition=conditions),
design = ~ condition))
DE
dds <- suppressMessages(DESeq(dds))
plotDispEsts(dds)
res <- results(dds)
Subset to what is expressed
res <- res[!is.na(res$padj),]
Summary plots and stats
DESeq2::plotMA(res,alpha=alpha)
volcanoPlot(res,alpha=alpha)
hist(res$padj,breaks=seq(0,1,.01))
sum(res$padj<alpha,na.rm=TRUE)
## [1] 555
Select significant genes
signf1 <- as.data.frame(res)[res$padj<alpha,]
write.csv2(signf1,"signf1.csv")
conditions <- samples$Date[match(mysamplessec,samples$Filename)]
dds <- suppressMessages(DESeqDataSetFromMatrix(
countData = count.table[,mysamplessec],
colData = data.frame(condition=conditions),
design = ~ condition))
DE
dds <- suppressMessages(DESeq(dds))
plotDispEsts(dds)
res <- results(dds,contrast=c("condition","July-11-2011", "July-4-2011"))
Subset to what is expressed
res <- res[!is.na(res$padj),]
Summary plots and stats
DESeq2::plotMA(res,alpha=alpha)
volcanoPlot(res,alpha=alpha)
hist(res$padj,breaks=seq(0,1,.01))
sum(res$padj<alpha,na.rm=TRUE)
## [1] 738
Select significant genes
signf2 <- as.data.frame(res)[res$padj<alpha,]
write.csv2(signf2,"signf2.csv")
79 overlapping gene models between comparisons
overlap <- intersect(rownames(signf1),rownames(signf2))
write.csv2(overlap,"overlap.csv")
v <- venneuler(c(A=640, B=467, 'A&B'=79))
plot(v)
ligninge2 <- c("MA_15852g0010","MA_10429279g0010","MA_44561g0010","MA_123220g0010","MA_118702g0010",
"MA_130482g0010","MA_14460g0010","MA_10426159g0010","MA_10430515g0010",
"MA_8946476g0010","MA_70509g0010","MA_633407g0010","MA_56692g0010","MA_109119g0010","MA_106573g0010",
"MA_109033g0010","MA_10435234g0010","MA_175723g0010","MA_137442g0010",
"MA_10055697g0010","MA_334200g0010","MA_9103099g0010","MA_10433066g0010","MA_7247276g0010","MA_10432446g0030",
"MA_9992421g0010","MA_59638g0010","MA_10435850g0010","MA_13550g0010","MA_10427193g0010","MA_10435550g0030",
"MA_28222g0010","MA_10427075g0010","MA_10434709g0010","MA_53288g0010","MA_10429973g0020",
"MA_10434848g0010","MA_24539g0010","MA_10432870g0010","MA_158072g0010","MA_10430254g0010",
"MA_52987g0020","MA_61321g0010","MA_17680g0010","MA_54958g0010","MA_109548g0010",
"MA_99750g0010","MA_10425888g0010","MA_10432610g0020","MA_949441g0010","MA_16731g0010","MA_10427515g0010",
"MA_362678g0010","MA_6931g0010","MA_9065834g0010","MA_222430g0010","MA_832501g0010","MA_137109g0010",
"MA_166604g0010","MA_68461g0010","MA_10435631g0010","MA_9446650g0010","MA_10435810g0010","MA_10436663g0010",
"MA_29397g0010","MA_46269g0010","MA_10426788g0020","MA_76956g0010","MA_165197g0010","MA_363801g0010",
"MA_10432099g0010","MA_10436313g0010","MA_104401g0010","MA_10427985g0010",
"MA_423264g0010","MA_10436231g0020","MA_18380g0010","MA_135446g0010","MA_135525g0010","MA_10432110g0010",
"MA_93032g0010","MA_9470692g0010","MA_62368g0010","MA_87599g0010","MA_47348g0010",
"MA_811078g0010","MA_170004g0010","MA_67291g0010","MA_132857g0010","MA_14717g0010","MA_10434090g0010",
"MA_185257g0010","MA_192464g0010","MA_57140g0010","MA_192698g0010",
"MA_57140g0020","MA_76578g0010","MA_10434864g0010","MA_10432715g0010","MA_10427033g0010","MA_118833g0010",
"MA_75861g0010","MA_33057g0010","MA_66348g0010","MA_122769g0010","MA_28768g0010","MA_115430g0010",
"MA_14240g0010","MA_205016g0010","MA_6005856g0010","MA_10430775g0010","MA_10435976g0010","MA_186345g0010",
"MA_10432379g0020","MA_256822g0010","MA_66808g0010","MA_185360g0010","MA_74620g0010","MA_656798g0010",
"MA_124869g0010",
"MA_140596g0010","MA_108425g0010","MA_109058g0010","MA_10227622g0010","MA_125040g0020","MA_45356g0010",
"MA_10431506g0010","MA_235485g0010","MA_111431g0010","MA_10434028g0020","MA_3486g0010",
"MA_96757g0010","MA_91956g0010","MA_10425995g0010","MA_10437025g0010",
"MA_177017g0010","MA_10427936g0010","MA_10432809g0020","MA_195775g0010","MA_6833274g0010",
"MA_412995g0010","MA_183305g0010","MA_41416g0010","MA_10g0010",
"MA_166386g0010","MA_9873048g0010","MA_941794g0010","MA_119796g0010","MA_132958g0020",
"MA_10428075g0010","MA_10431507g0020",
"MA_10435488g0020","MA_444g0010","MA_5316g0010",
"MA_10433564g0020","MA_83406g0010","MA_91294g0020","MA_93867g0010",
"MA_10426168g0010","MA_10434825g0010","MA_10429595g0010")
xylemsamples <- c("P1789_101","P1789_102","P1789_103","P825_201","P825_202","P825_203","P825_204","P825_205","P825_206",
"P825_207","P825_208","P825_209","P825_210","P825_211","P825_212","P825_213","P825_214","P825_215",
"P825_216","P825_217","P825_218","P825_219","P825_220","P825_221","P825_222","P825_223","P825_224",
"P825_225","P825_226","P825_228","P825_229","P825_230","P825_231","P825_232","P825_233","P825_234",
"P825_236","P825_237","P825_238","P825_239","P825_240","P825_241","P825_242","P825_243","P825_244",
"P825_245","P825_246","P825_247","P825_248","P825_249","P825_250","P825_251","P825_252","P825_253",
"P825_254","P825_255","P825_256","P825_257","P825_258","P825_259","P825_260","P825_261","P825_262",
"P825_263","P825_264")
ligningmatrix2 <- scale(bst[ligninge2,xylemsamples])
colnames(ligningmatrix2) <- samples[match(colnames(ligningmatrix2),samples$Filename),"Date"]
pdf("~/Git/UPSCb/projects/spruce-wood-time-series/doc/heat_lignin.pdf", width = 24, height = 13, onefile = FALSE)
my.breaks <- c(seq(-3,3,length.out=100))
# create heatmap
heatmap.2(ligningmatrix2,Colv=NA,col=my_palette,keysize=0.7,density.info="none",
trace="none",cexRow=0.7,cexCol=0.7,margins=c(7,7),breaks=my.breaks,
dendrogram = "row")
dev.off()
## png
## 2
lgenes <-data.frame("lgenes" = c("MA_6931g0010","MA_109548g0010","MA_130482g0010","MA_118702g0010",
"MA_10432099g0010","MA_9446650g0010","MA_87599g0010","MA_106573g0010",
"MA_362678g0010","MA_123220g0010","MA_56692g0010","MA_10436231g0020","MA_10435810g0010","MA_10427515g0010","MA_104401g0010",
"MA_10433066g0010","MA_61321g0010","MA_62368g0010","MA_91956g0010","MA_6833274g0010","MA_412995g0010","MA_76578g0010","MA_57140g0010",
"MA_192464g0010","MA_33057g0010","MA_6005856g0010","MA_175723g0010","MA_118833g0010",
"MA_205016g0010","MA_10434090g0010","MA_10432715g0010"),
"family" = c("CCoAOMT","C3H","C4H","C4H","COMT","CCR","CSE","HCT","CCoAOMT","PAL","4CL","CAD","CCR2","C3H2",
"CAD","C3H2","F5H","CSE2","PRX","PRX","PRX","LAC","LAC","LAC","LAC","LAC","CYP","LAC","LAC","LAC","LAC"),
"cluster" = paste("Cluster", c(rep(1, 11), rep(2, 7), rep(3, 13))),
stringsAsFactors=FALSE)
cluter_family_map <- tapply(lgenes$family, lgenes$cluster, function(x){unique(x)})
lgenes$lty <- apply(lgenes,1,function(x){
which(cluter_family_map[[x[3]]] %in% x[2])
})
samples <- samples[match(colnames(count.table),samples$Filename),]
cbPalette <- c("gray47","saddlebrown","blue3", "brown3", "chartreuse3", "cadetblue3", "cyan3", "paleturquoise4", "aquamarine3",
"deeppink3","darkseagreen3","darkorchid3","red3","gray22","yellow3","dodgerblue3",
"burlywood4","plum3","salmon","violetred3","springgreen3","slategray4","peru","darkslategrey","darkorange3",
"midnightblue","yellowgreen","forestgreen","palevioletred","goldenrod4","royalblue")
plotCandidatesDate <- function(lgenes){
subdf <- as.data.frame(bst[lgenes[,1],])
colnames(subdf) <- samples$ID
subdf <- as.data.frame(cbind(subdf,lgenes))
sub.bst <- melt(subdf, id.vars = c("lgenes", "family", "cluster", "lty")) #Convert to row major data frame format
sub.bst[,5] <- as.character(sub.bst[,5])
d <- mdy(as.character(samples$Date)[match(sub.bst[,5],samples$ID)])
o <- order(d)
d.dmy <- paste(day(d),month(d),year(d),sep="-")
sub.bst$Date <- factor(d.dmy, levels=unique(d.dmy[o]))
sub.bst$Tissue <- factor(samples[match(sub.bst[,5],samples$ID),"Tissue"])
sub.bst$Filename <- samples[match(sub.bst[,5],samples$ID),"Filename"]
colnames(sub.bst) <- c("Gene","Family","Cluster","Line","Sample","Expression",
"Date","Tissue","Filename") #Set column names
line_labels <- lapply(
lapply(
tapply(
sub.bst$Family, sub.bst$Line, paste),
unique),
function(f){
l<-length(f)
paste("C", seq(l),":",f,sep="")
}
)
line_labels <- unlist(lapply(line_labels,paste,collapse=", "))
sub.bst <- sub.bst[sub.bst$Tissue=="Xylem",]
sub.bst$Family_Gene <- paste(sub.bst$Family,sub.bst$Gene,sep="_")
pl <- ggplot(sub.bst, aes(x=Date,group=Gene,y=Expression)) + #Base layer for plot
stat_summary(fun.y = mean,geom = "line", lwd=1,
aes(color = Gene, lty = factor(Line))) +
scale_colour_manual(values=cbPalette) +
scale_linetype(name = "Cluster:Family", labels = line_labels) +
theme(axis.text.x = element_text(angle = 70, hjust = 1)) +
facet_grid(Cluster~., scales="free_y") +
guides(col = guide_legend(nrow = 3 ),
lty = guide_legend(nrow = 2, keywidth = unit(2, "cm"))) +
theme(legend.position = "bottom", panel.background = element_blank(),
panel.grid = element_blank(), panel.border = element_rect(fill=NA)) +
scale_x_discrete(expand = c(0, 0))
return(pl)
}
plotCandidatesDate(lgenes)
lgenes <-data.frame("lgenes" = c("MA_6931g0010","MA_109548g0010","MA_130482g0010","MA_118702g0010",
"MA_10432099g0010","MA_9446650g0010","MA_87599g0010","MA_106573g0010",
"MA_362678g0010","MA_123220g0010","MA_56692g0010","MA_10436231g0020","MA_10435810g0010","MA_10427515g0010","MA_104401g0010",
"MA_10433066g0010","MA_61321g0010","MA_62368g0010","MA_91956g0010","MA_6833274g0010","MA_412995g0010","MA_76578g0010","MA_57140g0010",
"MA_192464g0010","MA_33057g0010","MA_6005856g0010","MA_175723g0010","MA_118833g0010",
"MA_205016g0010","MA_10434090g0010","MA_10432715g0010"),
"family" = c("CCoAOMT","C3H","C4H","C4H","COMT","CCR","CSE","HCT","CCoAOMT","PAL","4CL","CAD","CCR2","C3H2",
"CAD","C3H2","F5H","CSE2","PRX","PRX","PRX","LAC","LAC","LAC","LAC","LAC","CYP","LAC","LAC","LAC","LAC"),
"cluster" = paste("Cluster", c(rep(1, 11), rep(2, 7), rep(3, 13))),
stringsAsFactors=FALSE)
samples <- samples[match(colnames(count.table),samples$Filename),]
cbPalette <- c("gray47","saddlebrown","midnightblue", "chartreuse3", "dodgerblue3",
"deeppink3","darkorchid3","coral3","gray22","goldenrod3",
"plum4","aquamarine3","khaki3","azure3","darkorange3",
"cyan3","royalblue")
plotCandidatesDate <- function(lgenes){
subdf <- as.data.frame(bst[lgenes[,1],])
colnames(subdf) <- samples$ID
subdf <- as.data.frame(cbind(subdf,lgenes))
sub.bst <- melt(subdf, id.vars = c("lgenes", "family", "cluster")) #Convert to row major data frame format
sub.bst[,4] <- as.character(sub.bst[,4])
d <- mdy(as.character(samples$Date)[match(sub.bst[,4],samples$ID)])
o <- order(d)
d.dmy <- paste(day(d),month(d),year(d),sep="-")
sub.bst$Date <- factor(d.dmy, levels=unique(d.dmy[o]))
sub.bst$Tissue <- factor(samples[match(sub.bst[,4],samples$ID),"Tissue"])
sub.bst$Filename <- samples[match(sub.bst[,4],samples$ID),"Filename"]
colnames(sub.bst) <- c("Gene","Family","Cluster","Sample","Expression",
"Date","Tissue","Filename") #Set column names
sub.bst <- sub.bst[sub.bst$Tissue=="Xylem",]
sub.bst$Family_Gene <- paste(sub.bst$Family,sub.bst$Gene,sep="_")
pl <- ggplot(sub.bst, aes(x=Date,group=Gene,y=Expression)) + #Base layer for plot
stat_summary(fun.y = mean,geom = "line", lwd=1,
aes(color = Family)) +
scale_colour_manual(values=cbPalette) +
theme(axis.text.x = element_text(angle = 70, hjust = 1), panel.background = element_blank(),
panel.grid = element_blank(), panel.border = element_rect(fill=NA)) +
scale_x_discrete(expand = c(0, 0)) +
facet_grid(Cluster~., scales="free_y")
return(pl)
}
pdf("~/Git/UPSCb/projects/spruce-wood-time-series/doc/plot_lignin.pdf", width = 8, height = 13, onefile = FALSE)
plotCandidatesDate(lgenes)
dev.off()
## png
## 2
totlignin = data.frame(lignin = c(20.62453,
25.25811,
25.48501,
25.0127,
25.67378,
22.68023,
25.49668,
24.72025,
26.28046,
19.71202,
23.84836,
21.26355,
26.84147,
23.36291,
23.99029,
26.25053,
24.63209,
24.90654,
24.88728,
25.60002,
24.32007,
29.56139,
25.52557,
27.0483,
25.4805,
23.26183,
30.42753,
25.148),
daynro = c(1,
1,
1,
16,
16,
16,
30,
30,
30,
57,
57,
87,
87,
87,
115,
115,
115,
155,
155,
155,
184,
184,
184,
219,
219,
247,
247,
247
)
)
lig.mod1 = lm(lignin ~ daynro, data = totlignin)
totlignin = data.frame(lignin = c(28.98292,
29.93209,
29.56214,
29.92202,
31.63774,
28.73988,
31.18209,
31.2291,
31.0205,
28.40422,
31.1398,
30.20112,
27.27941,
28.30992,
28.76324,
27.94533,
27.34749,
28.74167,
27.46825,
26.82445,
26.51981,
25.6461,
27.2398,
27.35525,
32.74898,
28.80224,
27.01786,
28.14662,
30.69162,
31.16474,
29.54658,
27.76631,
30.06107,
29.52142,
25.75068
),
daynro = c(1,
1,
1,
15,
15,
15,
30,
30,
30,
45,
45,
72,
72,
72,
102,
102,
102,
130,
130,
130,
170,
170,
170,
199,
199,
199,
234,
234,
234,
262,
262,
262,
274,
274,
274
)
)
lig.mod2 = lm(lignin ~ daynro, data = totlignin)
markers <-data.frame("markers" = c("MA_9897324g0010",
"MA_10020g0010",
"MA_915221g0010",
"MA_106416g0010",
"MA_920994g0010",
"MA_325398g0010"
),
"family" = c("APL","EXPA","EXPA","CDKB","CDKB","CDKB"
),
stringsAsFactors=FALSE)
samples <- samples[match(colnames(count.table),samples$Filename),]
cbPalette <- c("azure3","saddlebrown","blue3", "salmon","deeppink3","darkorchid3")
Two functions to define the error bar y positions for the standard error of the mean
sem_ymin <- function(x){
m <- mean(x)
s <- sd(x)
se <- s/sqrt(length(x))
return(m - se)
}
sem_ymax <- function(x){
m <- mean(x)
s <- sd(x)
se <- s/sqrt(length(x))
return(m + se)
}
plotCandidatesDate <- function(markers){
subdf <- as.data.frame(bst[markers[,1],])
colnames(subdf) <- samples$ID
subdf <- as.data.frame(cbind(subdf,markers))
sub.bst <- melt(subdf) #Convert to row major data frame format
sub.bst[,3] <- as.character(sub.bst[,3])
d <- mdy(as.character(samples$Date)[match(sub.bst[,3],samples$ID)])
o <- order(d)
d.dmy <- paste(day(d),month(d),year(d),sep="-")
sub.bst$Date <- factor(d.dmy, levels=unique(d.dmy[o]))
sub.bst$Tissue <- factor(samples[match(sub.bst[,3],samples$ID),"Tissue"])
sub.bst$Filename <- samples[match(sub.bst[,3],samples$ID),"Filename"]
colnames(sub.bst) <- c("Gene","Family","Sample","Expression","Date","Tissue","Filename") #Set column names
sub.bst$Family_Gene <- paste(sub.bst$Family,sub.bst$Gene,sep="_")
pl <- ggplot(sub.bst, aes(x=Date,group=Gene,y=Expression)) + #Base layer for plot
stat_summary(fun.y = mean,geom = "line", lwd=1, aes(color = Gene)) + #Add line for the mean
stat_summary(fun.y = mean, fun.ymin = sem_ymin, fun.ymax = sem_ymax)+ #Add error bars and mean dots#Put all genes in separate facets
scale_colour_manual(values=cbPalette) +
theme(axis.text.x = element_text(angle = 70, hjust = 1)) +
facet_grid(Family~Tissue,scales="free")#Rotate axis text
return(pl)
}
suppressMessages(plotCandidatesDate(markers))
## Warning: Removed 6 rows containing missing values (geom_pointrange).
markers <-data.frame("markers" = c("MA_10427616g0010",
"MA_10020g0010",
"MA_915221g0010",
"MA_183130g0010",
"MA_140410g0010",
"MA_10429177g0010"
),
"family" = c("AHP6","EXPA","EXPA","CESA","CESA","CESA"
),
stringsAsFactors=FALSE)
samples <- samples[match(colnames(count.table),samples$Filename),]
cbPalette <- c("yellow3","gray47","plum4","cyan3","forestgreen","darkorange3"
)
plotCandidatesDate <- function(markers){
subdf <- as.data.frame(bst[markers[,1],])
colnames(subdf) <- samples$ID
subdf <- as.data.frame(cbind(subdf,markers))
sub.bst <- melt(subdf) #Convert to row major data frame format
sub.bst[,3] <- as.character(sub.bst[,3])
d <- mdy(as.character(samples$Date)[match(sub.bst[,3],samples$ID)])
o <- order(d)
d.dmy <- paste(day(d),month(d),year(d),sep="-")
sub.bst$Date <- factor(d.dmy, levels=unique(d.dmy[o]))
sub.bst$Tissue <- factor(samples[match(sub.bst[,3],samples$ID),"Tissue"])
sub.bst$Filename <- samples[match(sub.bst[,3],samples$ID),"Filename"]
colnames(sub.bst) <- c("Gene","Family","Sample","Expression","Date","Tissue","Filename") #Set column names
sub.bst$Family_Gene <- paste(sub.bst$Family,sub.bst$Gene,sep="_")
pl <- ggplot(sub.bst, aes(x=Date,group=Gene,y=Expression)) + #Base layer for plot
stat_summary(fun.y = mean,geom = "line", lwd=1, aes(color = Gene)) + #Add line for the mean
stat_summary(fun.y = mean, fun.ymin = sem_ymin, fun.ymax = sem_ymax)+ #Add error bars and mean dots#Put all genes in separate facets
scale_colour_manual(values=cbPalette) +
theme(axis.text.x = element_text(angle = 70, hjust = 1)) +
facet_grid(Family~Tissue,scales="free")#Rotate axis text
return(pl)
}
suppressMessages(plotCandidatesDate(markers))
## Warning: Removed 6 rows containing missing values (geom_pointrange).
gen <-data.frame("gen" = c("MA_10434782g0020",
"MA_9483804g0010",
"MA_101790g0010",
"MA_17311g0010",
"MA_7091g0010"
),
"Family" = c("AS2","MYB16","MYB4","MYB4","MYB4"
),
stringsAsFactors=FALSE)
samples <- samples[match(colnames(count.table),samples$Filename),]
cbPalette <- c("gray47","deeppink3","dodgerblue3","peru","yellowgreen")
Two functions to define the error bar y positions for the standard error of the mean
sem_ymin <- function(x){
m <- mean(x)
s <- sd(x)
se <- s/sqrt(length(x))
return(m - se)
}
sem_ymax <- function(x){
m <- mean(x)
s <- sd(x)
se <- s/sqrt(length(x))
return(m + se)
}
plotCandidatesDate <- function(gen){
subdf <- as.data.frame(bst[gen[,1],])
colnames(subdf) <- samples$ID
subdf <- as.data.frame(cbind(subdf,gen))
sub.bst <- melt(subdf) #Convert to row major data frame format
sub.bst[,3] <- as.character(sub.bst[,3])
d <- mdy(as.character(samples$Date)[match(sub.bst[,3],samples$ID)])
o <- order(d)
d.dmy <- paste(day(d),month(d),year(d),sep="-")
sub.bst$Date <- factor(d.dmy, levels=unique(d.dmy[o]))
sub.bst$Tissue <- factor(samples[match(sub.bst[,3],samples$ID),"Tissue"])
sub.bst$Filename <- samples[match(sub.bst[,3],samples$ID),"Filename"]
colnames(sub.bst) <- c("Gene","Family","Sample","Expression","Date","Tissue","Filename") #Set column names
sub.bst <- sub.bst[sub.bst$Tissue=="Xylem",]
sub.bst$Family_Gene <- paste(sub.bst$Family,sub.bst$Gene,sep="_")
pl <- ggplot(sub.bst, aes(x=Date,group=Gene,y=Expression)) + #Base layer for plot
stat_summary(fun.y = mean,geom = "line", lwd=1, aes(color = Gene)) + #Add line for the mean
stat_summary(fun.y = mean, fun.ymin = sem_ymin, fun.ymax = sem_ymax)+ #Add error bars and mean dots#Put all genes in separate facets
scale_colour_manual(values=cbPalette) +
theme(axis.text.x = element_text(angle = 70, hjust = 1), panel.background = element_blank(),
panel.grid = element_blank(), panel.border = element_rect(fill=NA),aspect.ratio=0.6) +
scale_x_discrete(expand = c(0, 0.6))+
facet_grid(.~ Family, scales="free_y")
return(pl)
}
suppressMessages(plotCandidatesDate(gen))
## Warning: Removed 5 rows containing missing values (geom_pointrange).
gen <-data.frame("gen" = c("MA_121578g0010",
"MA_400747g0010",
"MA_5386467g0010"
),
"Family" = c("AIL1","FTL1","FTL2"
),
stringsAsFactors=FALSE)
samples <- samples[match(colnames(count.table),samples$Filename),]
cbPalette <- c("gray47","deeppink3","dodgerblue3","peru")
Two functions to define the error bar y positions for the standard error of the mean
sem_ymin <- function(x){
m <- mean(x)
s <- sd(x)
se <- s/sqrt(length(x))
return(m - se)
}
sem_ymax <- function(x){
m <- mean(x)
s <- sd(x)
se <- s/sqrt(length(x))
return(m + se)
}
plotCandidatesDate <- function(gen){
subdf <- as.data.frame(bst[gen[,1],])
colnames(subdf) <- samples$ID
subdf <- as.data.frame(cbind(subdf,gen))
sub.bst <- melt(subdf) #Convert to row major data frame format
sub.bst[,3] <- as.character(sub.bst[,3])
d <- mdy(as.character(samples$Date)[match(sub.bst[,3],samples$ID)])
o <- order(d)
d.dmy <- paste(day(d),month(d),year(d),sep="-")
sub.bst$Date <- factor(d.dmy, levels=unique(d.dmy[o]))
sub.bst$Tissue <- factor(samples[match(sub.bst[,3],samples$ID),"Tissue"])
sub.bst$Filename <- samples[match(sub.bst[,3],samples$ID),"Filename"]
colnames(sub.bst) <- c("Gene","Family","Sample","Expression","Date","Tissue","Filename") #Set column names
sub.bst <- sub.bst[sub.bst$Tissue=="Phloem",]
sub.bst$Family_Gene <- paste(sub.bst$Family,sub.bst$Gene,sep="_")
pl <- ggplot(sub.bst, aes(x=Date,group=Gene,y=Expression)) + #Base layer for plot
stat_summary(fun.y = mean,geom = "line", lwd=1, aes(color = Gene)) + #Add line for the mean
stat_summary(fun.y = mean, fun.ymin = sem_ymin, fun.ymax = sem_ymax)+ #Add error bars and mean dots#Put all genes in separate facets
scale_colour_manual(values=cbPalette) +
theme(axis.text.x = element_text(angle = 70, hjust = 1), panel.background = element_blank(),
panel.grid = element_blank(), panel.border = element_rect(fill=NA),aspect.ratio=0.6) +
scale_x_discrete(expand = c(0, 0.6))+
facet_grid(.~ Family, scales="free_y")
return(pl)
}
suppressMessages(plotCandidatesDate(gen))
Expression of the putative latewood partition 2:2:1:X of the core network
gen <-data.frame("gen" = c("MA_131708g0010","MA_470613g0010","MA_10430216g0010",
"MA_10973g0010",
"MA_10432073g0010",
"MA_112831g0010",
"MA_90487g0010",
"MA_379710g0010",
"MA_371148g0010",
"MA_10358874g0010",
"MA_10435604g0010",
"MA_401686g0010",
"MA_9761124g0010",
"MA_10350802g0010",
"MA_10305558g0010",
"MA_10432173g0020",
"MA_2720g0020",
"MA_10274939g0010",
"MA_191789g0010",
"MA_23596g0020",
"MA_2720g0010",
"MA_4044619g0010",
"MA_4509352g0010",
"MA_92862g0010",
"MA_10053838g0010",
"MA_23596g0010",
"MA_9147880g0010",
"MA_9198822g0010",
"MA_10435961g0020",
"MA_104992g0010",
"MA_36564g0010",
"MA_8059983g0010",
"MA_10436313g0020",
"MA_9337419g0010",
"MA_10430511g0010",
"MA_10429479g0010",
"MA_8553148g0010",
"MA_4719g0010",
"MA_10435508g0010",
"MA_68461g0010",
"MA_6025923g0010",
"MA_827389g0010",
"MA_9822402g0010",
"MA_38411g0010",
"MA_9922406g0010",
"MA_10435508g0020",
"MA_517867g0010",
"MA_10431130g0010",
"MA_18929g0020",
"MA_14041g0010",
"MA_10437278g0030",
"MA_10428184g0010",
"MA_10432594g0020",
"MA_1115518g0010",
"MA_176882g0010",
"MA_10432701g0010",
"MA_147583g0010",
"MA_413510g0010",
"MA_11806g0010",
"MA_57712g0010",
"MA_673539g0010",
"MA_10435107g0010",
"MA_904g0010",
"MA_9171580g0010",
"MA_81147g0010",
"MA_436575g0010",
"MA_10435847g0010",
"MA_10427283g0030",
"MA_10436587g0020",
"MA_66608g0010",
"MA_10426545g0010",
"MA_10426545g0020",
"MA_10144850g0010",
"MA_1854g0020",
"MA_216719g0010",
"MA_19115g0010",
"MA_90677g0010",
"MA_62006g0010",
"MA_1356g0010",
"MA_444g0010",
"MA_224485g0010",
"MA_10435055g0010",
"MA_53796g0010",
"MA_10435495g0010",
"MA_216312g0010",
"MA_158751g0010",
"MA_10426903g0010",
"MA_131887g0010",
"MA_133989g0010",
"MA_10437025g0010",
"MA_6150g0020",
"MA_782885g0010",
"MA_496233g0010",
"MA_180698g0010",
"MA_10430663g0010",
"MA_10429174g0010",
"MA_10431010g0010",
"MA_5534358g0010",
"MA_9540939g0010",
"MA_49713g0010",
"MA_2012364g0010",
"MA_46805g0010",
"MA_629925g0010",
"MA_896685g0010",
"MA_408380g0010",
"MA_96669g0010",
"MA_946576g0010",
"MA_55891g0010",
"MA_309477g0010",
"MA_10429296g0020",
"MA_10433716g0010",
"MA_10436513g0010",
"MA_379082g0010",
"MA_563604g0010",
"MA_10430629g0010",
"MA_890224g0010",
"MA_109285g0010",
"MA_5590688g0010",
"MA_183426g0010",
"MA_81001g0010",
"MA_14242g0010",
"MA_10428802g0010"
),
"family" = c("MA_131708g0010",rep("other",121)),
stringsAsFactors=FALSE)
samples <- samples[match(colnames(count.table),samples$Filename),]
cbPalette <- c("deeppink3",rep("azure3",121))
plotCandidatesDate <- function(gen){
subdf <- as.data.frame(bst[gen[,1],])
colnames(subdf) <- samples$ID
subdf <- as.data.frame(cbind(subdf,gen))
sub.bst <- melt(subdf) #Convert to row major data frame format
sub.bst[,3] <- as.character(sub.bst[,3])
d <- mdy(as.character(samples$Date)[match(sub.bst[,3],samples$ID)])
o <- order(d)
d.dmy <- paste(day(d),month(d),year(d),sep="-")
sub.bst$Date <- factor(d.dmy, levels=unique(d.dmy[o]))
sub.bst$Tissue <- factor(samples[match(sub.bst[,3],samples$ID),"Tissue"])
sub.bst$Filename <- samples[match(sub.bst[,3],samples$ID),"Filename"]
colnames(sub.bst) <- c("Gene","Family","Sample","Expression","Date","Tissue","Filename") #Set column names
sub.bst <- sub.bst[sub.bst$Tissue=="Xylem",]
sub.bst$Family_Gene <- paste(sub.bst$Family,sub.bst$Gene,sep="_")
pl <- ggplot(sub.bst, aes(x=Date,group=Gene,y=Expression)) + #Base layer for plot
stat_summary(fun.y = mean,geom = "line", lwd=0.7, aes(color = Family)) + #Add line for the mean
scale_colour_manual(values=cbPalette)+
theme(axis.text.x = element_text(angle = 70, hjust = 1), panel.background = element_blank(),
panel.grid = element_blank(), panel.border = element_rect(fill=NA),aspect.ratio=0.6)
return(pl)
}
suppressMessages(plotCandidatesDate(gen))
## R version 3.5.0 (2018-04-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/openblas-base/libblas.so.3
## LAPACK: /usr/lib/libopenblasp-r0.2.18.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=en_US.UTF-8
## [9] LC_ADDRESS=en_US.UTF-8 LC_TELEPHONE=en_US.UTF-8
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=en_US.UTF-8
##
## attached base packages:
## [1] grid stats4 parallel stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] LSD_4.0-0 lubridate_1.7.4
## [3] ggplot2_3.0.0 data.table_1.11.4
## [5] VennDiagram_1.6.20 futile.logger_1.4.3
## [7] venneuler_1.1-0 rJava_0.9-10
## [9] gplots_3.0.1 pander_0.6.2
## [11] scatterplot3d_0.3-41 RColorBrewer_1.1-2
## [13] DESeq2_1.20.0 SummarizedExperiment_1.10.1
## [15] DelayedArray_0.6.2 BiocParallel_1.14.2
## [17] matrixStats_0.54.0 Biobase_2.40.0
## [19] GenomicRanges_1.32.6 GenomeInfoDb_1.16.0
## [21] IRanges_2.14.10 S4Vectors_0.18.3
## [23] BiocGenerics_0.26.0
##
## loaded via a namespace (and not attached):
## [1] bitops_1.0-6 bit64_0.9-7 rprojroot_1.3-2
## [4] tools_3.5.0 backports_1.1.2 R6_2.2.2
## [7] rpart_4.1-13 KernSmooth_2.23-15 Hmisc_4.1-1
## [10] DBI_1.0.0 lazyeval_0.2.1 colorspace_1.3-2
## [13] nnet_7.3-12 withr_2.1.2 tidyselect_0.2.4
## [16] gridExtra_2.3 bit_1.1-14 compiler_3.5.0
## [19] formatR_1.5 htmlTable_1.12 labeling_0.3
## [22] caTools_1.17.1.1 scales_0.5.0 checkmate_1.8.5
## [25] genefilter_1.62.0 stringr_1.3.1 digest_0.6.15
## [28] foreign_0.8-71 rmarkdown_1.10 XVector_0.20.0
## [31] base64enc_0.1-3 pkgconfig_2.0.1 htmltools_0.3.6
## [34] limma_3.36.2 htmlwidgets_1.2 rlang_0.2.1
## [37] rstudioapi_0.7 RSQLite_2.1.1 bindr_0.1.1
## [40] gtools_3.8.1 acepack_1.4.1 dplyr_0.7.6
## [43] RCurl_1.95-4.11 magrittr_1.5 GenomeInfoDbData_1.1.0
## [46] Formula_1.2-3 Matrix_1.2-14 Rcpp_0.12.18
## [49] munsell_0.5.0 stringi_1.2.4 yaml_2.2.0
## [52] zlibbioc_1.26.0 plyr_1.8.4 blob_1.1.1
## [55] gdata_2.18.0 crayon_1.3.4 lattice_0.20-35
## [58] splines_3.5.0 annotate_1.58.0 locfit_1.5-9.1
## [61] knitr_1.20 pillar_1.3.0 reshape2_1.4.3
## [64] geneplotter_1.58.0 futile.options_1.0.1 XML_3.98-1.12
## [67] glue_1.3.0 evaluate_0.11 latticeExtra_0.6-28
## [70] lambda.r_1.2.3 gtable_0.2.0 purrr_0.2.5
## [73] assertthat_0.2.0 xtable_1.8-2 survival_2.42-6
## [76] tibble_1.4.2 AnnotationDbi_1.42.1 memoise_1.1.0
## [79] bindrcpp_0.2.2 cluster_2.0.7-1